

### ESTIMATE HIERARCHICAL IRT MODEL ON CONGRESSIONAL DATA AS NOMINATE
### result.rda is the output of estimate
### hier_scatter.pdf is Figure 12, a scatterplot comparing NOMINATE vs hierIRT.  Code for plot at end of this file.
### All other files are the original DW-NOMINATE data files pulled from Voteview, which are used as data (after substantial processing below)

## CODE SETUP
#1. Build a list of all groups across all 30 Houses first.  Get G size.
#2. Tabulate number of votes J_n from each session.
#3. Subset only votes from a particular house
#4. Loop over each legislator for that house (i), so i[l] and g[l] are known implicitly
#	 -Can write out g[i] and z[i] for each legislator before looping across votes
#	 -Write out i[l], j[l], y[l] for each votes

rm(list = ls())
#library(hierIRT)
library(emIRT)
set.seed(1)

gethouses <- 1:110

## Read vote-oriented file for vote indices
tmp <- read.fwf("HT01110_PRES_1ST.VT3", widths=c(4,5))
vote.index <- table(tmp[,1])[gethouses]
vote.index <- cbind(Jn=vote.index, session=1:length(gethouses), house = gethouses, Jprior=0)
for(i in 2:nrow(vote.index)) vote.index[i,"Jprior"] = sum(vote.index[1:(i-1),"Jn"])

## Read legislator-oriented file to generate group list
## legis is the one used to store and index results
## legis.all keeps the legislators in other sessions for subsetting data
legis.all <- read.fwf("H01110_PRES_1ST.VT3", widths=c(4,6,-19,11))
colnames(legis.all) = c("house","id","name")
legis <- legis.all[legis.all$house %in% gethouses,]
legis$z <- 0
legis$i <- 1:nrow(legis)
legis$groupsize <- 0

## Generate group list
## Note there is some duplication in names, so we construct list manually off id instead
## Manual extraction of groups
groups <- data.frame(id=unique(legis$id), name=NA, start.z0=0, start.z1=0)
for(i in 1:nrow(groups)) groups$name[i] <- as.character(legis[legis$id == groups$id[i],"name"][1])

## Starting values from NOMINATE
starts <- read.fwf("HL01110B21_PRES.DAT", widths=c(4,6,-19,11,7))
colnames(starts) <- c("house","id","name","startval")
starts <- starts[starts$house %in% gethouses,]
for(i in 1:nrow(groups)){ 
	coords <- starts[starts$id == groups$id[i],"startval"]
	groups$start.z0[i] <- coords[1]
	if(length(coords) > 4) groups$start.z1[i] <- coords[2] - coords[1]
}

## Link each legislator to group
legis$g=NA
for(i in 1:nrow(legis)) legis$g[i] <- which(groups$id== legis$id[i])

## Generate covariate vector depending on start time
for(i in 1:nrow(groups)){
  index <- which(legis$id %in% groups$id[i])
  legis$z[index] <- 0:(length(index)-1)
  legis$groupsize[index] <- length(index)
}

## Constant ideal point constraint for under 4 legislatures if desired
legis$z[which(legis$groupsize < 5)] <- 0

## Get to the point of reading individual congresses
legis.per.house <- table(legis.all$house)

## Caculate number of i legislators to skip in each house, Iprior
tmp = as.numeric(legis.per.house[gethouses])
Iprior.tmp <- rep(0,length(gethouses))
for(i in 2:length(tmp)) Iprior.tmp[i] = sum(tmp[1:(i-1)])
vote.index <- cbind(vote.index, Iprior=Iprior.tmp)

## Generate empty full data vector representation, using max size
y.vec <- numeric(2^25)
i.vec <- numeric(2^25)
j.vec <- numeric(2^25)


#####  BIG LOOP ACROSS HOUSES HERE ######

counter = 0	 #Counts across NL observations
for(whichhouse in 1:length(gethouses)){
#whichhouse=2
#gethouses[whichhouse]

## This is the vote file for a particular house
## Proceed by looping over, taking care to add Jprior votes and Iprior legislators from before

Jn = vote.index[which(vote.index[,"house"]==gethouses[whichhouse]) , "Jn"]
rcmat <- read.fwf("H01110_PRES_1ST.VT3", widths=c(-40,rep(1, Jn)),
	n=legis.per.house[gethouses[whichhouse]],
	skip=sum(legis.per.house[1:(gethouses[whichhouse]-1)]))

rcmat <- as.matrix(rcmat)
Jprior = vote.index[which(vote.index[,"house"]==gethouses[whichhouse]) , "Jprior"]
Iprior = vote.index[which(vote.index[,"house"]==gethouses[whichhouse]) , "Iprior"]

## Recodes for easier extraction of y
rcmat[rcmat %in% c(7,8,9,0)] <- NA
rcmat[rcmat %in% c(4,5,6)] <- -1		# nay code
rcmat[rcmat %in% c(1,2,3)] <- 1		# yea code

for(i in 1:nrow(rcmat)){
  for(j in 1:ncol(rcmat)){

	if(!is.na(rcmat[i,j])){

		counter <- counter + 1
		y.vec[counter] <- rcmat[i,j]
		i.vec[counter] <- i + Iprior
		j.vec[counter] <- j + Jprior
	
	}	#end if()

  } #end for(j in 1:ncol(tmp)
}	#end for(i in 1:nrow(tmp))

	## BIG LOOP ENDS
	cat("\n\tHouse", gethouses[whichhouse], "complete...")
	flush.console()

## BIG LOOP ENDS
} # end for(whichhouse in 1:length(gethouses))

### Shorten vectors to length NL
y.vec <- y.vec[1:counter]
i.vec <- i.vec[1:counter]
j.vec <- j.vec[1:counter]

## Remember to decrement i[], j[], and g[] for 0 baseline, construct zvec
i.vec = i.vec - 1
j.vec = j.vec - 1
g.vec = legis$g
g.vec = g.vec - 1
z.vec = cbind(1,legis$z)

### Wrap data input
data.in <- list(y=matrix(y.vec, ncol=1),
		z=z.vec,
		g=matrix(g.vec, ncol=1),
		i=matrix(i.vec, ncol=1),
		j=matrix(j.vec, ncol=1),
		ND=ncol(z.vec),
		NG=max(g.vec) + 1,
		NJ=max(j.vec) + 1,
		NI=max(i.vec) + 1,
		NL=length(y.vec))


## Set priors: This setup assumes no noise, as in DW-NOMINATE
priors <- list(beta.mu=matrix(c(0,0),2,1),
		beta.sigma=diag(2)*25,
		gamma.mu=matrix(rep(0,ncol(z.vec)),ncol(z.vec),1),
		gamma.sigma=diag(ncol(z.vec))*25,
		sigma.v=matrix(100000000,1,1),
		sigma.s=matrix(0.00000001,1,1))


### Use DW-NOMINATE starts
#gamma.start <- matrix(0, nrow=data.in$NG, ncol=ncol(z.vec))
#gamma.start[,1] <- rnorm(data.in$NG)
gamma.start <- as.matrix(groups[,c("start.z0","start.z1")])
cur <- list(alpha=matrix(rnorm(data.in$NJ), nrow=data.in$NJ, ncol=1),
		beta=matrix(rnorm(data.in$NJ), nrow=data.in$NJ, ncol=1),
		gamma=gamma.start,
		eta=matrix(0, nrow=data.in$NI, ncol=1),
	    	sigma = matrix(0.000000000001, data.in$NG, 1))

#save(data.in,cur,priors, file="input.rda")

time <- system.time({
    lout <- hierIRT(.data = data.in,
                    .starts = cur,
                    .priors = priors,
                    .control = {list(
                    threads = 8,
                    verbose = TRUE,
                    thresh = 1e-4,
		    maxit=200,
		     checkfreq=1
                        )})
})

save(time, legis, lout, file="result.rda")



#### GENERATE PLOT
library(foreign)
setwd("~/Documents/working/houtest")
load("result.rda")

final <- cbind(legis,idealpt.hier=lout$means$x_implied)

## Extract DW-NOMINATE file, merge on house and id
nomres <- read.dta("House_dwnom_105_to_110_PRES.DTA")
res <- merge(final, nomres, by.x=c("house","id"), by.y=c("cong","idno"),
	all.x=TRUE,all.y=FALSE)

## Z transform
res$dwnom1d <- (res$dwnom1_110 - mean(res$dwnom1_110,na.rm=TRUE))/sd(res$dwnom1_110,na.rm=TRUE)
res$idealpt.hier <- (res$idealpt.hier - mean(res$idealpt.hier))/sd((res$idealpt.hier))

## Correlation
cor(res$idealpt.hier,res$dwnom1d,use="pairwise")

pdf("hier_scatter.pdf")
plot(res$idealpt.hier, res$dwnom1d, xlab="Hierarchical VI Estimate",
	ylab="DW-NOMINATE Estimate", main="1st-110th House",
	xlim=c(-3,3), ylim=c(-3,3), type="n", bty="n",
	 cex.lab=1.5, cex.axis=1.5, cex.main=2)
abline(a=0, b=1, lwd=4, col="grey")
points(res$idealpt.hier, res$dwnom1d, pch=16, cex=0.7) 
text(2,-2,expression(paste(rho," = 0.97")), cex=2)
dev.off()






